\select@language {english}
\contentsline {chapter}{Nomenclature}{iv}{chapter*.2}
\contentsline {chapter}{\numberline {1}Introduction}{1}{chapter.1}
\contentsline {section}{\numberline {1.1}ASR System Architecture}{2}{section.1.1}
\contentsline {section}{\numberline {1.2}Mathematical Formulation of the ASR Problem}{5}{section.1.2}
\contentsline {section}{\numberline {1.3}Challenges in ASR Technology}{6}{section.1.3}
\contentsline {section}{\numberline {1.4}Motivation}{7}{section.1.4}
\contentsline {chapter}{\numberline {2}Automatic Speech Recognition}{8}{chapter.2}
\contentsline {section}{\numberline {2.1}Hidden Markov Models}{8}{section.2.1}
\contentsline {section}{\numberline {2.2}Basic Problems for HMM}{10}{section.2.2}
\contentsline {subsection}{\numberline {2.2.1}Likelihood: Forward Algorithm}{10}{subsection.2.2.1}
\contentsline {subsection}{\numberline {2.2.2}Decoding: Viterbi Algorithm}{13}{subsection.2.2.2}
\contentsline {subsection}{\numberline {2.2.3}Training: The Baum-Welch Algorithm}{14}{subsection.2.2.3}
\contentsline {section}{\numberline {2.3}Acoustic Likelihood Computation}{16}{section.2.3}
\contentsline {subsection}{\numberline {2.3.1}Gaussian Mixture Models}{16}{subsection.2.3.1}
\contentsline {subsection}{\numberline {2.3.2}Deep Neural Networks}{17}{subsection.2.3.2}
\contentsline {subsubsection}{\numberline {2.3.2.1}Generative Pre-training}{18}{subsubsection.2.3.2.1}
\contentsline {subsubsection}{\numberline {2.3.2.2}Discriminative Finetuning}{19}{subsubsection.2.3.2.2}
\contentsline {chapter}{\numberline {3}Minimizing Speaker Variability}{20}{chapter.3}
\contentsline {section}{\numberline {3.1}Speaker Variability in GMM-HMM Systems}{21}{section.3.1}
\contentsline {subsection}{\numberline {3.1.1}Model-based Schemes}{21}{subsection.3.1.1}
\contentsline {subsubsection}{\numberline {3.1.1.1}Maximum a Posteriori (MAP) Adaptation}{21}{subsubsection.3.1.1.1}
\contentsline {subsubsection}{\numberline {3.1.1.2}Maximum Likelihood Linear Regression (MLLR)}{22}{subsubsection.3.1.1.2}
\contentsline {paragraph}{\numberline {3.1.1.2.1}Standard MLLR}{22}{paragraph.3.1.1.2.1}
\contentsline {paragraph}{\numberline {3.1.1.2.2}Constrained MLLR (CMLLR)}{23}{paragraph.3.1.1.2.2}
\contentsline {paragraph}{\numberline {3.1.1.2.3}Speaker Adaptive Training (SAT)}{23}{paragraph.3.1.1.2.3}
\contentsline {subsubsection}{\numberline {3.1.1.3}Clustering}{23}{subsubsection.3.1.1.3}
\contentsline {paragraph}{\numberline {3.1.1.3.1}Cluster Adapative Training}{24}{paragraph.3.1.1.3.1}
\contentsline {paragraph}{\numberline {3.1.1.3.2}Eigenvoices}{25}{paragraph.3.1.1.3.2}
\contentsline {subsection}{\numberline {3.1.2}Feature-based Schemes}{25}{subsection.3.1.2}
\contentsline {subsubsection}{\numberline {3.1.2.1}Cepstral Mean Normalization (CMN) and Cepstral Variance Normalization (CVN)}{25}{subsubsection.3.1.2.1}
\contentsline {subsubsection}{\numberline {3.1.2.2}Vocal Tract Length Normalization (VTLN)}{25}{subsubsection.3.1.2.2}
\contentsline {subsubsection}{\numberline {3.1.2.3}Feature-space MLLR (FMLLR)}{26}{subsubsection.3.1.2.3}
\contentsline {section}{\numberline {3.2}Speaker Variability in DNN-HMM Systems}{27}{section.3.2}
\contentsline {subsection}{\numberline {3.2.1}Feature-based Schemes}{27}{subsection.3.2.1}
\contentsline {subsection}{\numberline {3.2.2}Model-based Schemes}{27}{subsection.3.2.2}
\contentsline {subsubsection}{\numberline {3.2.2.1}Retraining}{28}{subsubsection.3.2.2.1}
\contentsline {paragraph}{\numberline {3.2.2.1.1}Retraining the Entire Network}{28}{paragraph.3.2.2.1.1}
\contentsline {paragraph}{\numberline {3.2.2.1.2}Retraining the Last Layer}{28}{paragraph.3.2.2.1.2}
\contentsline {paragraph}{\numberline {3.2.2.1.3}Conservative Training}{29}{paragraph.3.2.2.1.3}
\contentsline {subsubsection}{\numberline {3.2.2.2}Speaker Input as a Speaker Dependent Bias}{30}{subsubsection.3.2.2.2}
\contentsline {paragraph}{\numberline {3.2.2.2.1}Augmenting Features With I-Vectors \citep {IVECT,IVECT1}}{30}{paragraph.3.2.2.2.1}
\contentsline {paragraph}{\numberline {3.2.2.2.2}Augmenting Features With MLP Factors \citep {MLP_FACTORS}}{31}{paragraph.3.2.2.2.2}
\contentsline {paragraph}{\numberline {3.2.2.2.3}Speaker Normalization Using Explicit Speaker Representations \citep {NORMDNN}}{31}{paragraph.3.2.2.2.3}
\contentsline {subsubsection}{\numberline {3.2.2.3}Speaker Code as a Speaker Dependent Bias}{31}{subsubsection.3.2.2.3}
\contentsline {paragraph}{\numberline {3.2.2.3.1}Speaker Code With the Adaptation Network \citep {SPEAKECODE1}}{32}{paragraph.3.2.2.3.1}
\contentsline {paragraph}{\numberline {3.2.2.3.2}Speaker Code Based Approach for Direct Adaptation in the Model Space \citep {SPEAKECODE2}}{33}{paragraph.3.2.2.3.2}
\contentsline {subsubsection}{\numberline {3.2.2.4}Augmenting the Model}{34}{subsubsection.3.2.2.4}
\contentsline {paragraph}{\numberline {3.2.2.4.1}Linear Input Network (LIN)}{34}{paragraph.3.2.2.4.1}
\contentsline {paragraph}{\numberline {3.2.2.4.2}Linear Hidden Network (LHN)}{35}{paragraph.3.2.2.4.2}
\contentsline {paragraph}{\numberline {3.2.2.4.3}Linear Output Network (LON)}{36}{paragraph.3.2.2.4.3}
\contentsline {paragraph}{\numberline {3.2.2.4.4}Subspace Method}{36}{paragraph.3.2.2.4.4}
\contentsline {chapter}{\numberline {4}Preliminary Results And Future Work}{37}{chapter.4}
\contentsline {section}{\numberline {4.1}LHN based Speaker Adaptation}{38}{section.4.1}
\contentsline {subsection}{\numberline {4.1.1}Database information}{38}{subsection.4.1.1}
\contentsline {subsection}{\numberline {4.1.2}Speaker Independent Model information}{39}{subsection.4.1.2}
\contentsline {subsection}{\numberline {4.1.3}Eigenvoice Based Adaptation}{40}{subsection.4.1.3}
\contentsline {section}{\numberline {4.2}Measuring the Speaker Variability of a DNN}{43}{section.4.2}
\contentsline {subsection}{\numberline {4.2.1}Entropy Based Measure}{43}{subsection.4.2.1}
\contentsline {subsection}{\numberline {4.2.2}Variance Based Measure}{45}{subsection.4.2.2}
\contentsline {subsection}{\numberline {4.2.3}Discussion}{48}{subsection.4.2.3}
\contentsline {chapter}{\numberline {5}Conclusion And Future Work}{49}{chapter.5}
\contentsline {section}{\numberline {5.1}Conclusion}{49}{section.5.1}
\contentsline {section}{\numberline {5.2}Future Work}{49}{section.5.2}
\contentsline {chapter}{References}{54}{chapter*.3}
